{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "import torch.utils.data as Data\n",
    "torch.manual_seed(8) # for reproduce\n",
    "\n",
    "import time\n",
    "import numpy as np\n",
    "import gc\n",
    "import sys\n",
    "sys.setrecursionlimit(50000)\n",
    "import pickle\n",
    "torch.backends.cudnn.benchmark = True\n",
    "torch.set_default_tensor_type('torch.cuda.FloatTensor')\n",
    "from tensorboardX import SummaryWriter\n",
    "torch.nn.Module.dump_patches = True\n",
    "import copy\n",
    "import pandas as pd\n",
    "#then import my own modules\n",
    "from AttentiveFP import Fingerprint, Fingerprint_viz, save_smiles_dicts, get_smiles_dicts, get_smiles_array, moltosvg_highlight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import roc_auc_score\n",
    "from sklearn.metrics import matthews_corrcoef\n",
    "from sklearn.metrics import recall_score\n",
    "from sklearn.metrics import accuracy_score\n",
    "from sklearn.metrics import r2_score\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "from sklearn.metrics import precision_score\n",
    "from sklearn.metrics import precision_recall_curve\n",
    "from sklearn.metrics import auc\n",
    "from sklearn.metrics import f1_score\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from rdkit.Chem import rdMolDescriptors, MolSurf\n",
    "# from rdkit.Chem.Draw import SimilarityMaps\n",
    "from rdkit import Chem\n",
    "# from rdkit.Chem import AllChem\n",
    "from rdkit.Chem import QED\n",
    "%matplotlib inline\n",
    "from numpy.polynomial.polynomial import polyfit\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.cm as cm\n",
    "import matplotlib\n",
    "from IPython.display import SVG, display\n",
    "import seaborn as sns; sns.set(color_codes=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of all smiles:  1427\n",
      "number of successfully processed smiles:  1427\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/pytorch/anaconda3/lib/python3.6/site-packages/matplotlib/axes/_axes.py:6462: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.\n",
      "  warnings.warn(\"The 'normed' kwarg is deprecated, and has been \"\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAU8AAAC/CAYAAAB+KF5fAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGxRJREFUeJzt3X9Q0/f9B/AnRihkIQhKu4r2WLD+BgEdVvklanVMb+eYVTx/kKuDUTHnhGrndVcrrrXOVqeNEaI7nLqJbnX2qsjZQzew0lmn3tzmtydGET21WoEQCL+S9/cP52eG6AIfMSbx+bjLXfP+vPLJ+0V6Tz6fzzt89BNCCBARUY/0edoTICLyRgxPIiIZGJ5ERDIwPImIZGB4EhHJwPAkIpKhb3eKDh8+jB07duDKlStQqVSYMGECCgoK8MILL0g1QggUFxdj7969qK+vR3R0NH75y19ixIgRDvuqqanB2rVrce7cOQQHB+O1117D0qVLoVAoXM7j9u2mHjUXGqpEfX1Lj17jydiP5/KlXoBns5/w8OAe7dPlkWdFRQXy8/MRFxcHg8GAN998E6dPn0Zubi7sdrtUZzQaYTAYkJ2djaKiIiiVSmi1Wty+fVuqaWxshFarhZ+fHwwGA/Ly8lBSUoItW7b0aNLd1bev60D2JuzHc/lSLwD76dY+XRUcOnQIo0aNwjvvvCONqVQqLFmyBJcvX0ZUVBTa2tpgNBqRk5ODBQsWAABiY2MxefJk7NmzB8uXLwcAlJaWoq2tDXq9HiqVComJibBYLNDr9cjOzoZKper1BomIngSXR56dnZ1OoaZWqwHcO1UHgDNnzsBisSA9PV2qUSqVSEtLQ1VVlTRWWVmJpKQkh/3NmDEDra2tOHXq1ON1QkTkRi7D8yc/+Qn+/ve/4+DBg7BYLLh8+TJ+85vfYPz48RgyZAgAwGQyQaFQIDIy0uG1UVFRMJlM0nOTyQSNRuNQM3DgQAQFBTnUERF5Open7ZMmTcK6devw9ttv46233gIAxMXFYdu2bVKN2WyGUql0WvQJCQmB1WpFe3s7AgICYDabERzsfFFWrVbDbDa7nGxoqLLH1y56ehHY07Efz+VLvQDsxxWX4fnll19i9erVWLRoEVJSUvDtt9/i448/Rl5eHnbu3CkFpp+fn9Nr75/WP7jtUXUPG++qp6t/4eHBPV6h92Tsx3P5Ui/As9lPT8PVZXiuX78ekydPxooVK6Sx4cOHIz09HRUVFZg2bRrUajWam5ths9kcjj7NZjOCgoLg7+8P4N4RZlOTcwMWi+WhR6Tu8pdz113WTIqNcMNMiMhbuLzmaTKZnL6rqdFoEBgYiKtXr0rPbTYbamtrnV774DVOjUbjdG3zxo0baGlpcboWSkTkyVyG58CBA/Hvf//bYezSpUtobW1FRMS9o7H4+HioVCqUl5dLNVarFcePH0dycrI0lpKSghMnTsBisUhjZWVlCAwMREJCwmM3Q0TkLi5P2zMzM7Fu3To8//zzSElJwZ07d7B161ZEREQgNTUVAPDcc88hJycHBoMBISEh0Gg0KCkpgd1ux8KFCx32tXv3buh0OmRnZ6Ourg56vR5arZbf8SQir+IyPBctWgR/f3/s3bsXpaWlCA4OxtixY1FQUAClUinV5eTkwG63o7i4GA0NDRg9ejRKSkowYMAAqSYkJAQ7d+5EYWEhcnNzoVarkZWVBZ1O92S6IyJ6Qvy86Z/h6OnqX3dXDL1lwehZXAH1Fr7UC/Bs9tPrf9tORETOGJ5ERDIwPImIZGB4EhHJwPAkIpKB4UlEJAPDk4hIBoYnEZEMDE8iIhkYnkREMjA8iYhkYHgSEcnA8CQikoHhSUQkA8OTiEgGhicRkQwMTyIiGRieREQyMDyJiGRgeBIRycDwJCKSgeFJRCQDw5OISAaGJxGRDAxPIiIZuhWenZ2dMBqNmDZtGkaPHo2UlBS8//77DjVCCBQVFSE1NRUxMTGYP38+Lly44LSvmpoaZGVlYcyYMUhKSsLmzZths9l6pxsiIjfp252iVatWobq6GkuXLoVGo8GNGzdw6dIlhxqj0QiDwYCVK1dCo9GgpKQEWq0Whw4dQnh4OACgsbERWq0WQ4YMgcFgwNWrV7F+/XrY7XYsX76897sjInpCXIZnZWUlysrK8Omnn2LIkCEPrWlra4PRaEROTg4WLFgAAIiNjcXkyZOxZ88eKRhLS0vR1tYGvV4PlUqFxMREWCwW6PV6ZGdnQ6VS9WJrRERPjsvT9k8++QSvvPLKI4MTAM6cOQOLxYL09HRpTKlUIi0tDVVVVdJYZWUlkpKSHEJyxowZaG1txalTp+T2QETkdi7D8x//+AciIyNRWFiI+Ph4jBkzBkuXLsWtW7ekGpPJBIVCgcjISIfXRkVFwWQyOdRpNBqHmoEDByIoKMihjojI07k8bb99+zYOHDiA4cOHY9OmTWhubsaGDRuwdOlS7N+/H35+fjCbzVAqlVAoFA6vDQkJgdVqRXt7OwICAmA2mxEcHOz0Hmq1Gmaz2eVkQ0OV6NtX4bLuQeHhzu/XVbAqsFf24w6eMo/e4kv9+FIvAPtxpVsLRgBgMBgQGhr6n0mEY8GCBfjyyy8xYcIEAICfn5/Ta4QQTtseVfew8a7q61u6O93/zDMYt283uaxrsrS6rOnOfp607vbjLXypH1/qBXg2++lpuLo8bVer1Rg6dKgUnAAwduxY+Pv7o6amRqppbm52+sqR2WxGUFAQ/P39pbqmJucGLBbLQ49IiYg8lcvwjIqKevSL+9x7uUajgc1mQ21trcP2rtc4NRqN07XNGzduoKWlxelaKBGRJ3MZnpMmTcLXX3+Nu3fvSmNfffUVOjo6MGzYMABAfHw8VCoVysvLpRqr1Yrjx48jOTlZGktJScGJEydgsViksbKyMgQGBiIhIaFXGiIicgeX4Tl37lz069cPb7zxBo4dO4bPPvsMK1euxMSJEzFu3DgAwHPPPYecnBwUFxfj97//Paqrq7Fs2TLY7XYsXLhQ2ldmZiYCAgKg0+lw8uRJ7Nu3D3q9Hlqtlt/xJCKv4nLBSKVS4Xe/+x1+9atfIT8/H/7+/pgyZQpWrVrlUJeTkwO73Y7i4mI0NDRg9OjRKCkpwYABA6SakJAQ7Ny5E4WFhcjNzYVarUZWVhZ0Ol3vd0ZE9AT5iftL4l6gp6t/3V0x/Mu56y5rJsVG9Oi9n4RncQXUW/hSL8Cz2U+vr7YTEZEzhicRkQwMTyIiGRieREQyMDyJiGRgeBIRycDwJCKSgeFJRCQDw5OISAaGJxGRDAxPIiIZGJ5ERDIwPImIZGB4EhHJwPAkIpKB4UlEJAPDk4hIBoYnEZEMDE8iIhkYnkREMjA8iYhkYHgSEcnA8CQikoHhSUQkQ4/D89atW4iLi8OwYcPQ3NwsjQshUFRUhNTUVMTExGD+/Pm4cOGC0+tramqQlZWFMWPGICkpCZs3b4bNZnu8LoiI3KzH4fnrX/8aSqXSadxoNMJgMCA7OxtFRUVQKpXQarW4ffu2VNPY2AitVgs/Pz8YDAbk5eWhpKQEW7ZsebwuiIjcrEfhefr0aVRVVeH11193GG9ra4PRaEROTg4WLFiAiRMnYvPmzfDz88OePXukutLSUrS1tUGv1yMxMRHz5s1DXl4edu7cCYvF0jsdERG5QbfD02azYe3atViyZAlCQ0Mdtp05cwYWiwXp6enSmFKpRFpaGqqqqqSxyspKJCUlQaVSSWMzZsxAa2srTp069Th9EBG5VbfD8/5R4/z58522mUwmKBQKREZGOoxHRUXBZDI51Gk0GoeagQMHIigoyKGOiMjT9e1OUX19PTZv3owNGzbA39/fabvZbIZSqYRCoXAYDwkJgdVqRXt7OwICAmA2mxEcHOz0erVaDbPZ7HIeoaFK9O2rcFn3oPBw5/frKlgV2Cv7cQdPmUdv8aV+fKkXgP240q3w3LRpE2JiYpCamvrIGj8/P6cxIYTTtkfVPWy8q/r6lu5MVxIeHozbt5tc1jVZWl3WdGc/T1p3+/EWvtSPL/UCPJv99DRcXYbnxYsXceDAAezZs0c6OrRarQAAi8UChUIBtVqN5uZm2Gw2h6NPs9mMoKAg6WhVrVajqcm5AYvF8tAj0t7wl3PXn8h+iejZ5jI8a2tr0dHRgblz5zptS0lJwezZszFz5kzYbDbU1tY6XNPseo1To9E4Xdu8ceMGWlpanK6FEhF5MpfhGR8fj127djmMVVVVYfv27TAajRg8eDAiIiKgUqlQXl6OJUuWALh3dHr8+HHMmTNHel1KSgp++9vfwmKxSCvuZWVlCAwMREJCQm/2RUT0RLkMz7CwMIwfP95h7Pr1e6fC48aNw3e+8x0AQE5ODgwGA0JCQqDRaFBSUgK73Y6FCxdKr8vMzMTu3buh0+mQnZ2Nuro66PV6aLVah68vERF5um4tGHVHTk4O7HY7iouL0dDQgNGjR6OkpAQDBgyQakJCQrBz504UFhYiNzcXarUaWVlZ0Ol0vTUNIiK38BP3l8S9QE9X/8LDg/HHz/+vV957UmxEr+zncTyLK6Dewpd6AZ7Nfnq62s67KhERycDwJCKSgeFJRCQDw5OISAaGJxGRDAxPIiIZGJ5ERDIwPImIZGB4EhHJwPAkIpKB4UlEJAPDk4hIBoYnEZEMDE8iIhkYnkREMjA8iYhkYHgSEcnA8CQikoHhSUQkA8OTiEgGhicRkQwMTyIiGRieREQyMDyJiGRwGZ5HjhxBbm4ukpOTERcXh4yMDBw6dMipbv/+/Zg2bRqio6ORkZGB6upqp5pbt24hLy8PcXFxGD9+PAoLC2G1WnunEyIiN+rrqmDnzp0YNGgQVq1ahdDQUFRWVqKgoAD19fVYuHAhAODw4cNYvXo1li5dirFjx+LAgQP42c9+hj/96U8YOnQoAKCzsxOLFy+Gv78/Nm3aBLPZjA8++ABmsxkffvjhk+2SiKiXuQzPbdu2ISwsTHo+YcIEfPPNNygpKZHCc8uWLZg1axby8vIAAAkJCbhw4QKMRqMUjOXl5bh06RKOHj2KwYMH33vzvn2Rn5+PpUuXIjIysrd7IyJ6Ylyetj8YnPeNGDECd+/eBQDU1dXhypUrSE9P/+9O+/TB9OnTUVVVJY1VVlYiOjpaCk4AmDp1Kvz9/R3qiIi8gawFo7NnzyIqKgoAYDKZAAAajcahJioqCg0NDVLImkwmp5qAgAC89NJL0j6IiLyFy9P2rqqrq1FRUYH3338fANDY2AgAUKvVDnUhISHS9rCwMJjNZgQHBzvtT61Ww2w2d+u9Q0OV6NtX0aP5BqsCe1T/KOHhznN/GjxlHr3Fl/rxpV4A9uNKj8Lz2rVrKCgowJQpU5CRkeGwzc/Pz+G5EMJpvGvNg3XdUV/f0pPpIjw8GE2W1h695lFu327qlf08jvDwYI+YR2/xpX58qRfg2eynp+Ha7dP2hoYGZGdn48UXX8SGDRuk8ftHmF2PHu8/v39Eqlar0dTkPPmmpiano1YiIk/XrfC0Wq3Izc1FR0cHjEYjlEqltO3+dcyu1y1NJhP69esnLThpNBqnmvb2dtTV1TldCyUi8nQuw7OzsxPLli3DlStXsH37dvTv399h++DBgxEZGYny8nJpzG63o7y8HMnJydJYSkoKzp8/j+vXr0tjx44dQ3t7u0MdEZE3cHnNc82aNfjrX/+Kt99+G42NjTh37py0beTIkQgICIBOp8OKFSsQERGB+Ph4HDx4ELW1tfjoo4+k2unTp6OoqAg6nQ7Lli1DU1MT1q1bh5kzZ/I7nkTkdVyG5xdffAEAeO+995y2VVRUYNCgQZg5cyZaWlqwfft2GAwGvPzyyyguLpb+uggA/P39sWPHDhQWFuLnP/85AgIC8MMf/hArV67sxXaIiNzDT/Rkufsp6+nqX3h4MP74+f/1yntPio3olf08jmdxBdRb+FIvwLPZzxNbbSciov9ieBIRycDwJCKSgeFJRCQDw5OISAaGJxGRDAxPIiIZenxLumfVX85dd1njCd8FJSL34JEnEZEMDE8iIhkYnkREMjA8iYhkYHgSEcnA8CQikoHhSUQkA8OTiEgGhicRkQwMTyIiGRieREQyMDyJiGRgeBIRycC7KvUi3nmJ6NnBI08iIhkYnkREMjA8iYhkcHt41tTUICsrC2PGjEFSUhI2b94Mm83m7mkQET0Wty4YNTY2QqvVYsiQITAYDLh69SrWr18Pu92O5cuXu3MqT013FpUeJVgViCZLKwAuPBE9bW4Nz9LSUrS1tUGv10OlUiExMREWiwV6vR7Z2dlQqVTunA4RkWxuDc/KykokJSU5hOSMGTPw4Ycf4tSpU5g8ebI7p+PV+LUooqfLreFpMpnwyiuvOIwNHDgQQUFBMJlMDM9e9jiXCB7k7hB257x7671ee3V4r7yXp/3C6+7Px53z9pSfo58QQjzxd/mPUaNGYcWKFdBqtQ7jKSkpmDVrFvLz8901FSKix+L21XY/Pz+nMSHEQ8eJiDyVW8NTrVajqanJadxisSA4ONidUyEieixuDU+NRgOTyeQwduPGDbS0tECj0bhzKkREj8Wt4ZmSkoITJ07AYrFIY2VlZQgMDERCQoI7p0JE9FjcGp6ZmZkICAiATqfDyZMnsW/fPuj1emi1Wn7Hk4i8iltX24F7f55ZWFiIc+fOQa1WY/bs2dDpdFAoFO6cBhHRY3F7eBIR+QKfu6uSt9x4pLa2Fu+88w5+9KMfYcSIEVi4cKFTjRACRUVFSE1NRUxMDObPn48LFy441T3tno8cOYLc3FwkJycjLi4OGRkZOHTokFPd/v37MW3aNERHRyMjIwPV1dVONbdu3UJeXh7i4uIwfvx4FBYWwmq1uqMNSXl5OTIzMzF+/HhER0dj+vTpMBgMaG9vl2q85bN5mFu3biEuLg7Dhg1Dc3OzNO4tPR04cADDhg1zeuzdu9e9vQgf0tDQIBITE0VWVpY4ceKE+MMf/iDGjBkjNm7c+LSn5uTzzz8XKSkpQqfTiR/84AdiwYIFTjVFRUUiOjpa7N69W3zxxRfipz/9qUhISBDffPONVOMJPc+ZM0fk5+eLw4cPi5MnT4oPPvhADB06VOzatUuqOXTokBg+fLjQ6/WiurparFixQkRHR4uvv/5aquno6BAzZswQs2bNEsePHxeffvqpmDBhgigoKHBbL0IIsXfvXrFx40Zx9OhRUV1dLYqLi0V0dLRYs2aNVOMtn83D5Ofni4kTJ4qhQ4cKi8UijXtLT5988okYOnSoqK6uFmfPnpUed+7ccWsvPhWeRUVFYty4caKpqUkaMxqNIiYmxmHME9hsNum/dTqdU3i2traK+Ph48fHHH0tjzc3NYvz48Q4frif0/O233zqN5efni7S0NOn5tGnTxC9+8Qvpuc1mEzNnznQIxs8++0wMHz5cXL16VRo7fPiwGDZsmLh8+fKTmXw3bdy4UYwdO1bY7Xav+my6+uqrr8T3v/99sWPHDofw9Kae7ofng8H/IHf14lOn7Y+68UhraytOnTr1FGfmrE+f//2jP3PmDCwWC9LT06UxpVKJtLQ0VFVVSWOe0HNYWJjT2IgRI3D37l0AQF1dHa5cueLQS58+fTB9+nSnXqKjozF48GBpbOrUqfD393eoexr69euHjo4OAN712TzIZrNh7dq1WLJkCUJDQx22eWtPD+OuXnwqPE0mk9OX7R+88Yg3MZlMUCgUiIyMdBiPiopy6MVTez579iyioqIAQJpH13lGRUWhoaFBCtmH9RIQEICXXnrpqfRis9lgtVpx+vRp7N69G/PmzYOfn5/Xfjb3bwk5f/58p23e2NOrr76KkSNHYvr06SgtLXWYozt68al/PdNsNj/0zzzVajXMZvNTmJF8ZrMZSqXS6StcISEhsFqtaG9vR0BAgEf2XF1djYqKCrz//vsA7t0E+/6cHhQSEiJtDwsL87heYmNjpUWiWbNmYeXKlQC887Opr6/H5s2bsWHDBvj7+ztt96aewsPDsWzZMsTExMBms+Hw4cNYvXo1WltbodVq3daLT4Un4Fs3HnlUL123eVLP165dQ0FBAaZMmYKMjAyHbV3n05NenobS0lJYrVacP38eW7duRWFhId59910A3vfZbNq0CTExMUhNTX1kjbf0lJycjOTkZOl5amoq2tvbsW3bNixatOh/zrHrtsfpxafC05duPKJWq9Hc3AybzebwG9RsNiMoKEg6evCknhsaGpCdnY0XX3wRGzZskMbvH2F2/U1//7f7/SPSR/XS1NTkdNTqDqNGjQIAjBs3DqGhoXjrrbfw+uuve91nc/HiRRw4cAB79uyRfub3v/5lsVigUCi8rqeupk+fjiNHjuD69etu68WnwtOXbjyi0Whgs9lQW1vrMPeu12k8pWer1Yrc3Fx0dHTAaDRCqVQ6zBG4N/eIiP/epNZkMqFfv37SgtPDemlvb0ddXR0yMzPd0MWjjRw5EsC9I2tv+2xqa2vR0dGBuXPnOm1LSUnB7NmzMXPmTK/q6X9x1+fjUwtGvnTjkfj4eKhUKpSXl0tjVqsVx48fdzhl8YSeOzs7sWzZMly5cgXbt29H//79HbYPHjwYkZGRDr3Y7XaUl5c79XL+/Hlcv/7fO4UfO3YM7e3tDnVPw5kzZwAAgwYN8qrPBrj3/9KuXbscHtnZ2QAAo9GIxYsXe11PXR09ehShoaGIiIhwWy+Kd+9fxPEBL7/8Mvbt24e//e1veP7553Hy5Els3LgRWVlZ//Naz9NgtVpRUVGBmpoanDhxAo2Njejfvz9qamoQERGBwMBAAEBRUZF0GrJu3TrcvHkT69evl47sPKHn1atXo6ysDAUFBejXrx9u3rwpPcLCwqBQKBAaGootW7agT58+sNls2Lp1K06fPo3169dLYfu9730PR48exdGjR/Hd734X//znP/Hee+9hypQpmDNnjlt6AYDFixfjzp07aGpqwrVr13Dw4EHo9Xq8+uqrmDt3Lvr2vXfC5g2fDQAEBQVh0KBBDo+bN2+ioqICa9aswQsvvOBVPel0Oly7dg0WiwWXL1+GwWDAoUOH8OabbyI2NtZ9vfT0C6qe7uLFi2LhwoUiOjpaJCYmik2bNonOzs6nPS0ndXV1YujQoQ991NXVCSGEsNvtwmAwiOTkZBEdHS3mzZsn/vWvfznt62n3nJaW5rIXIYTYt2+fmDp1qhg1apSYNWuWOHnypNO+bty4Id544w0RGxsrEhISxLvvvitaWlrc1osQQmzatEnMmDFDxMbGirFjx4pZs2aJXbt2ifb2dqnGWz6bR3nYF829paePPvpITJs2TcTExIjo6Gjx4x//WPz5z392qHFHL7wxCBGRDD51zZOIyF0YnkREMjA8iYhkYHgSEcnA8CQikoHhSUQkA8OTiEgGhicRkQz/D8lKZx3nFoIxAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 360x216 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "task_name = 'sider'\n",
    "tasks = [\n",
    "'SIDER1','SIDER2','SIDER3','SIDER4','SIDER5','SIDER6','SIDER7','SIDER8','SIDER9','SIDER10','SIDER11','SIDER12','SIDER13','SIDER14','SIDER15','SIDER16','SIDER17','SIDER18','SIDER19','SIDER20','SIDER21','SIDER22','SIDER23','SIDER24','SIDER25','SIDER26','SIDER27'\n",
    "]\n",
    "raw_filename = \"../data/sider.csv\"\n",
    "feature_filename = raw_filename.replace('.csv','.pickle')\n",
    "filename = raw_filename.replace('.csv','')\n",
    "prefix_filename = raw_filename.split('/')[-1].replace('.csv','')\n",
    "smiles_tasks_df = pd.read_csv(raw_filename)\n",
    "smilesList = smiles_tasks_df.smiles.values\n",
    "print(\"number of all smiles: \",len(smilesList))\n",
    "atom_num_dist = []\n",
    "remained_smiles = []\n",
    "canonical_smiles_list = []\n",
    "for smiles in smilesList:\n",
    "    try:        \n",
    "        mol = Chem.MolFromSmiles(smiles)\n",
    "        atom_num_dist.append(len(mol.GetAtoms()))\n",
    "        remained_smiles.append(smiles)\n",
    "        canonical_smiles_list.append(Chem.MolToSmiles(Chem.MolFromSmiles(smiles), isomericSmiles=True))\n",
    "    except:\n",
    "        print(\"not successfully processed smiles: \", smiles)\n",
    "        pass\n",
    "print(\"number of successfully processed smiles: \", len(remained_smiles))\n",
    "smiles_tasks_df = smiles_tasks_df[smiles_tasks_df[\"smiles\"].isin(remained_smiles)]\n",
    "# print(smiles_tasks_df)\n",
    "smiles_tasks_df['cano_smiles'] =canonical_smiles_list\n",
    "\n",
    "plt.figure(figsize=(5, 3))\n",
    "sns.set(font_scale=1.5)\n",
    "ax = sns.distplot(atom_num_dist, bins=28, kde=False)\n",
    "plt.tight_layout()\n",
    "# plt.savefig(\"atom_num_dist_\"+prefix_filename+\".png\",dpi=200)\n",
    "plt.show()\n",
    "plt.close()\n",
    "\n",
    "# print(len([i for i in atom_num_dist if i<51]),len([i for i in atom_num_dist if i>50]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "random_seed = 68\n",
    "start_time = str(time.ctime()).replace(':','-').replace(' ','_')\n",
    "start = time.time()\n",
    "\n",
    "batch_size = 100\n",
    "epochs = 800\n",
    "p_dropout = 0.5\n",
    "fingerprint_dim = 200\n",
    "\n",
    "radius = 3\n",
    "T = 3\n",
    "weight_decay = 3 # also known as l2_regularization_lambda\n",
    "learning_rate = 3.5\n",
    "per_task_output_units_num = 2 # for classification model with 2 classes\n",
    "output_units_num = len(tasks) * per_task_output_units_num"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>SIDER1</th>\n",
       "      <th>SIDER2</th>\n",
       "      <th>SIDER3</th>\n",
       "      <th>SIDER4</th>\n",
       "      <th>SIDER5</th>\n",
       "      <th>SIDER6</th>\n",
       "      <th>SIDER7</th>\n",
       "      <th>SIDER8</th>\n",
       "      <th>SIDER9</th>\n",
       "      <th>SIDER10</th>\n",
       "      <th>...</th>\n",
       "      <th>SIDER20</th>\n",
       "      <th>SIDER21</th>\n",
       "      <th>SIDER22</th>\n",
       "      <th>SIDER23</th>\n",
       "      <th>SIDER24</th>\n",
       "      <th>SIDER25</th>\n",
       "      <th>SIDER26</th>\n",
       "      <th>SIDER27</th>\n",
       "      <th>smiles</th>\n",
       "      <th>cano_smiles</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@H]1C(=O)N[C@H]2CSSC[C@@H](C(=O)N[...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)CN)C(=O)N[C@H](C(=O)N[C@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](CC(C)C)NC(=O)[C@H]...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>N</td>\n",
       "      <td>N</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CC1=CC=C(C=C1)O)...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>I</td>\n",
       "      <td>I</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>[Ca+2]</td>\n",
       "      <td>[Ca+2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>C</td>\n",
       "      <td>C</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Cl-]</td>\n",
       "      <td>[Cl-]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[K+]</td>\n",
       "      <td>[K+]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Mg+2]</td>\n",
       "      <td>[Mg+2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Na+]</td>\n",
       "      <td>[Na+]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[OH-]</td>\n",
       "      <td>[OH-]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>FS(F)(F)(F)(F)F</td>\n",
       "      <td>FS(F)(F)(F)(F)F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>182</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>C[C@H]([C@@H](C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H]...</td>\n",
       "      <td>CSCC[C@H](N)C(=O)N[C@@H](CCC(N)=O)C(=O)N[C@@H]...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>189</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H]([C@@H](C)O)C(=O)...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>197</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N...</td>\n",
       "      <td>CC[C@H](C)[C@@H]1NC(=O)[C@H](CCCNC(=N)N)NC(=O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>201</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@@H](Cc1ccccc1)NC(=O)[...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>207</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[H-].[H-].[Ba+2]</td>\n",
       "      <td>[Ba+2].[H-].[H-]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>215</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[H-].[H-].[Sr+2]</td>\n",
       "      <td>[H-].[H-].[Sr+2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[As]</td>\n",
       "      <td>[As]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>291</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>C[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](CC(...</td>\n",
       "      <td>CSCC[C@H](NC(=O)[C@H](CO)NC(=O)[C@H](Cc1ccc(O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>294</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>C[C@H]([C@@H](C(=O)N[C@@H](CC1=CC=CC=C1)C(=O)N...</td>\n",
       "      <td>CSCC[C@H](NC(=O)[C@H](CC(C)C)NC(=O)[C@H](Cc1c[...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>309</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CO)C(=O)N[C@@H](...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](CC(=O)O)NC(=O)[C@H...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>334</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](C(C)C)C(=O)N[C@@...</td>\n",
       "      <td>CC(C)C[C@H](NC(=O)[C@H](C)NC(=O)[C@H](CCC(=O)O...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>336</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CC(C)C)C(=O)NCC(...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](CS)NC(=O)[C@H](CCC...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>338</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](CC(C)C)C(=O)N1CC...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@@H]1CCCN1C(=O)CNC(=O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>424</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Se]</td>\n",
       "      <td>[Se]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>627</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@@H](C(=O)...</td>\n",
       "      <td>CC[C@H](C)[C@@H]1NC(=O)[C@H](CCCNC(=N)N)NC(=O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>723</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>C[C@H]([C@@H](C(=O)N[C@@H](CC1=CC=CC=C1)C(=O)N...</td>\n",
       "      <td>CC(=O)O.CC(C)C[C@H](NC(=O)CNC(=O)[C@H](CCC(N)=...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>740</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Ra]</td>\n",
       "      <td>[Ra]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>745</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Li+].[OH-]</td>\n",
       "      <td>[Li+].[OH-]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CCCCCCCCCCCCCCCC(=O)N[C@@H](CCC(=O)NCCCC[C@@H]...</td>\n",
       "      <td>CCCCCCCCCCCCCCCC(=O)N[C@@H](CCC(=O)NCCCC[C@H](...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1058</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Na+].[I-]</td>\n",
       "      <td>[I-].[Na+]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1120</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC/C=C/CC(=O)N[C@@H](CC1=CC=C(C=C1)O)C(=O)N[C@...</td>\n",
       "      <td>CC/C=C/CC(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H]...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1223</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N[C@@H](C(C)C)C(=O)N[C@@...</td>\n",
       "      <td>CC(C)C[C@H](NC(=O)[C@H](C)NC(=O)[C@H](CCC(=O)O...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1243</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>[O-2].[O-2].[O-2].[249Cf].[249Cf]</td>\n",
       "      <td>[249Cf].[249Cf].[O-2].[O-2].[O-2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1297</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[O-2].[O-2].[O-2].[As+3].[As+3]</td>\n",
       "      <td>[As+3].[As+3].[O-2].[O-2].[O-2]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1300</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>F</td>\n",
       "      <td>F</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1311</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Ga+3]</td>\n",
       "      <td>[Ga+3]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1333</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CCCCCCCCCCCCCC(=O)NCCCC[C@@H](C(=O)O)NC(=O)[C@...</td>\n",
       "      <td>CCCCCCCCCCCCCC(=O)NCCCC[C@H](NC(=O)[C@@H]1CCCN...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1352</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Fe]</td>\n",
       "      <td>[Fe]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1353</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>[La]</td>\n",
       "      <td>[La]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1354</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>[Sm]</td>\n",
       "      <td>[Sm]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1355</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>[Ag]</td>\n",
       "      <td>[Ag]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1356</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Tc]</td>\n",
       "      <td>[Tc]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1361</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Cr]</td>\n",
       "      <td>[Cr]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1363</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Cu]</td>\n",
       "      <td>[Cu]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1364</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>[Gd]</td>\n",
       "      <td>[Gd]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1365</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Y]</td>\n",
       "      <td>[Y]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1366</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>[Zn]</td>\n",
       "      <td>[Zn]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1381</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(CSSCC(C(=O)NC(...</td>\n",
       "      <td>CC(C)CC(NC(=O)C(CCCCN)NC(=O)CNC(=O)C(CC(C)C)NC...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1382</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC(C)[C@@H](C(=O)NCC(=O)N[C@H](CCCCN)C(=O)N[C@...</td>\n",
       "      <td>CSCC[C@H](NC(=O)[C@H](CO)NC(=O)[C@H](Cc1ccc(O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1388</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC(C)CC(C(=O)NC(C(C)C)C(=O)O)NC(=O)CNC(=O)C(CC...</td>\n",
       "      <td>CC(C)CC(NC(=O)CNC(=O)C(CCC(N)=O)NC(=O)C(CC(C)C...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1390</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@H]1C(=O)N[C@H]2CSSC[C@@H](C(=O)N[...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)CN)C(=O)N[C@H](C(=O)N[C@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1391</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>CC1=CN(C(=O)NC1=O)[C@H]2C[C@@H]([C@H](O2)COP(=...</td>\n",
       "      <td>COCCO[C@H]1[C@@H](O)[C@H](COP(=O)([O-])S[C@H]2...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1392</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CCC(C)C(C(=O)NC(CCC(=O)N)C(=O)NC(CC(C)C)C(=O)N...</td>\n",
       "      <td>CCC(C)C(NC(=O)C(CCC(=O)O)NC(=O)C(CO)NC(=O)C(NC...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1393</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC1C(=O)NC(C(=O)NC2CSSCC3C(=O)NC(C(=O)NC(C(=O)...</td>\n",
       "      <td>CSCCC1NC(=O)C(CC(C)C)NC(=O)C(CCCNC(=N)N)NC(=O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1396</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)NCC(=O)N[C@H]...</td>\n",
       "      <td>CC[C@H](C)[C@@H]1NC(=O)[C@@H]2CSSC[C@H](NC(=O)...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1397</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>CC[C@H](C)[C@@H](C(=O)N1CCC[C@H]1C(=O)N[C@@H](...</td>\n",
       "      <td>CC[C@H](C)[C@H](NC(=O)[C@H](CCC(=O)O)NC(=O)[C@...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>61 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      SIDER1  SIDER2  SIDER3  SIDER4  SIDER5  SIDER6  SIDER7  SIDER8  SIDER9  \\\n",
       "5          0       1       0       1       1       1       1       0       1   \n",
       "29         1       1       0       1       1       1       1       1       1   \n",
       "41         0       1       0       0       1       0       0       0       0   \n",
       "47         0       0       0       0       1       0       1       0       1   \n",
       "50         0       0       0       1       1       1       1       0       1   \n",
       "51         0       1       0       0       0       0       1       0       1   \n",
       "53         0       1       0       0       0       0       1       0       0   \n",
       "59         0       1       0       0       1       0       1       0       0   \n",
       "126        0       1       0       0       1       0       1       0       0   \n",
       "136        0       1       0       0       1       0       0       0       0   \n",
       "139        0       1       0       1       1       0       1       0       1   \n",
       "148        0       1       0       0       1       0       0       0       0   \n",
       "149        0       0       0       1       1       1       1       0       1   \n",
       "162        0       1       0       0       0       1       1       0       1   \n",
       "182        0       1       0       1       1       1       1       0       1   \n",
       "189        1       1       0       0       1       1       1       0       0   \n",
       "197        1       1       0       0       1       1       1       0       1   \n",
       "201        0       1       0       0       1       1       1       0       1   \n",
       "207        0       0       0       0       0       0       1       0       0   \n",
       "215        0       0       0       0       1       0       0       0       0   \n",
       "251        0       1       0       1       1       1       1       0       1   \n",
       "291        0       1       0       1       1       1       1       0       1   \n",
       "294        0       0       0       0       1       0       1       0       1   \n",
       "309        0       0       0       0       0       0       0       0       0   \n",
       "334        0       1       0       1       1       1       1       0       1   \n",
       "336        1       1       0       0       1       0       1       0       1   \n",
       "338        0       1       0       0       0       1       1       0       1   \n",
       "424        0       0       0       0       0       0       0       0       0   \n",
       "627        0       1       0       0       1       1       1       0       1   \n",
       "723        0       0       0       0       0       0       1       0       0   \n",
       "...      ...     ...     ...     ...     ...     ...     ...     ...     ...   \n",
       "740        0       1       0       0       0       0       1       0       0   \n",
       "745        0       1       0       1       1       1       1       0       0   \n",
       "977        0       1       0       0       1       0       1       0       1   \n",
       "1058       0       0       0       0       0       0       1       0       0   \n",
       "1120       0       1       0       0       1       1       1       0       0   \n",
       "1223       0       1       0       1       1       1       1       1       1   \n",
       "1243       0       1       0       1       0       1       1       0       1   \n",
       "1297       1       1       0       1       1       1       1       0       1   \n",
       "1300       0       0       0       0       0       0       0       0       1   \n",
       "1311       0       1       0       1       1       0       1       0       1   \n",
       "1333       1       1       0       1       1       1       1       1       1   \n",
       "1352       0       1       0       1       1       1       1       0       1   \n",
       "1353       0       1       0       0       0       0       1       0       0   \n",
       "1354       0       0       0       0       1       1       1       0       1   \n",
       "1355       0       0       0       0       0       1       0       1       0   \n",
       "1356       0       0       0       0       1       1       1       0       1   \n",
       "1361       1       0       0       0       0       0       1       0       0   \n",
       "1363       0       0       0       0       1       1       0       0       0   \n",
       "1364       0       1       0       1       1       1       1       0       1   \n",
       "1365       0       0       0       0       0       0       0       0       0   \n",
       "1366       0       0       0       0       0       0       1       0       0   \n",
       "1381       1       1       0       1       1       1       1       0       1   \n",
       "1382       0       1       0       1       1       1       1       0       1   \n",
       "1388       0       0       0       0       1       0       1       0       0   \n",
       "1390       0       1       0       1       1       1       1       1       1   \n",
       "1391       1       0       0       0       1       1       1       0       1   \n",
       "1392       0       1       0       0       1       1       1       0       1   \n",
       "1393       0       1       0       1       1       1       1       1       1   \n",
       "1396       0       1       0       0       1       1       1       0       1   \n",
       "1397       0       0       1       1       1       1       1       0       1   \n",
       "\n",
       "      SIDER10                        ...                          SIDER20  \\\n",
       "5           0                        ...                                1   \n",
       "29          0                        ...                                1   \n",
       "41          0                        ...                                0   \n",
       "47          0                        ...                                0   \n",
       "50          1                        ...                                1   \n",
       "51          0                        ...                                0   \n",
       "53          0                        ...                                1   \n",
       "59          0                        ...                                0   \n",
       "126         0                        ...                                1   \n",
       "136         0                        ...                                1   \n",
       "139         0                        ...                                1   \n",
       "148         0                        ...                                1   \n",
       "149         0                        ...                                1   \n",
       "162         0                        ...                                0   \n",
       "182         0                        ...                                1   \n",
       "189         0                        ...                                1   \n",
       "197         0                        ...                                1   \n",
       "201         1                        ...                                1   \n",
       "207         0                        ...                                1   \n",
       "215         0                        ...                                0   \n",
       "251         1                        ...                                1   \n",
       "291         1                        ...                                1   \n",
       "294         0                        ...                                0   \n",
       "309         0                        ...                                1   \n",
       "334         1                        ...                                1   \n",
       "336         1                        ...                                1   \n",
       "338         0                        ...                                1   \n",
       "424         0                        ...                                0   \n",
       "627         0                        ...                                1   \n",
       "723         0                        ...                                0   \n",
       "...       ...                        ...                              ...   \n",
       "740         0                        ...                                0   \n",
       "745         1                        ...                                0   \n",
       "977         0                        ...                                1   \n",
       "1058        0                        ...                                0   \n",
       "1120        1                        ...                                0   \n",
       "1223        1                        ...                                1   \n",
       "1243        0                        ...                                1   \n",
       "1297        1                        ...                                1   \n",
       "1300        0                        ...                                0   \n",
       "1311        0                        ...                                1   \n",
       "1333        1                        ...                                1   \n",
       "1352        1                        ...                                1   \n",
       "1353        0                        ...                                0   \n",
       "1354        0                        ...                                1   \n",
       "1355        0                        ...                                0   \n",
       "1356        1                        ...                                1   \n",
       "1361        0                        ...                                0   \n",
       "1363        1                        ...                                0   \n",
       "1364        1                        ...                                1   \n",
       "1365        0                        ...                                0   \n",
       "1366        0                        ...                                0   \n",
       "1381        1                        ...                                1   \n",
       "1382        1                        ...                                1   \n",
       "1388        0                        ...                                1   \n",
       "1390        0                        ...                                1   \n",
       "1391        0                        ...                                0   \n",
       "1392        0                        ...                                1   \n",
       "1393        0                        ...                                1   \n",
       "1396        0                        ...                                1   \n",
       "1397        1                        ...                                1   \n",
       "\n",
       "      SIDER21  SIDER22  SIDER23  SIDER24  SIDER25  SIDER26  SIDER27  \\\n",
       "5           1        1        0        0        1        1        1   \n",
       "29          1        1        0        1        1        1        1   \n",
       "41          0        0        0        0        0        0        0   \n",
       "47          0        0        0        0        0        1        1   \n",
       "50          1        0        0        0        1        1        1   \n",
       "51          1        0        0        0        1        1        0   \n",
       "53          0        0        0        0        0        0        0   \n",
       "59          0        0        0        0        0        0        0   \n",
       "126         1        0        1        0        1        1        1   \n",
       "136         0        0        0        0        0        1        1   \n",
       "139         1        1        0        0        1        1        1   \n",
       "148         0        0        0        0        0        0        0   \n",
       "149         1        0        0        0        1        1        0   \n",
       "162         0        0        0        0        1        1        0   \n",
       "182         1        1        1        1        0        1        1   \n",
       "189         1        1        0        0        1        1        1   \n",
       "197         1        1        0        0        1        1        1   \n",
       "201         1        1        0        0        1        1        1   \n",
       "207         0        0        0        0        0        0        0   \n",
       "215         0        0        0        0        0        0        0   \n",
       "251         1        1        0        1        1        1        1   \n",
       "291         1        0        0        1        1        1        1   \n",
       "294         0        0        0        0        1        1        0   \n",
       "309         0        0        0        0        1        1        0   \n",
       "334         0        1        0        0        1        1        1   \n",
       "336         1        1        0        0        1        1        1   \n",
       "338         0        0        0        0        0        1        1   \n",
       "424         0        0        0        0        0        0        0   \n",
       "627         0        1        0        0        1        1        0   \n",
       "723         1        0        0        0        1        1        0   \n",
       "...       ...      ...      ...      ...      ...      ...      ...   \n",
       "740         0        1        0        0        0        0        0   \n",
       "745         1        1        0        1        1        1        1   \n",
       "977         0        1        0        0        1        1        1   \n",
       "1058        0        0        0        0        1        0        0   \n",
       "1120        1        0        0        0        1        1        1   \n",
       "1223        1        1        1        0        1        1        1   \n",
       "1243        1        0        0        1        1        1        0   \n",
       "1297        1        1        0        1        1        1        1   \n",
       "1300        0        0        0        0        0        0        1   \n",
       "1311        1        1        0        1        1        1        1   \n",
       "1333        1        1        1        1        1        1        1   \n",
       "1352        1        1        0        1        1        1        1   \n",
       "1353        0        0        0        0        0        0        1   \n",
       "1354        0        1        0        0        1        1        0   \n",
       "1355        0        1        0        0        0        1        0   \n",
       "1356        0        0        0        0        1        1        1   \n",
       "1361        0        0        0        0        0        1        1   \n",
       "1363        0        0        1        0        0        0        0   \n",
       "1364        1        1        0        1        1        1        1   \n",
       "1365        0        0        0        0        0        0        0   \n",
       "1366        0        0        0        0        0        0        0   \n",
       "1381        1        1        0        1        1        1        1   \n",
       "1382        1        0        0        1        1        1        1   \n",
       "1388        1        0        0        0        1        1        1   \n",
       "1390        1        1        0        1        1        1        1   \n",
       "1391        1        1        0        0        1        1        0   \n",
       "1392        1        1        0        1        1        1        1   \n",
       "1393        1        1        0        1        1        1        1   \n",
       "1396        1        1        0        0        0        1        1   \n",
       "1397        1        1        0        1        1        1        1   \n",
       "\n",
       "                                                 smiles  \\\n",
       "5     CC[C@H](C)[C@H]1C(=O)N[C@H]2CSSC[C@@H](C(=O)N[...   \n",
       "29    CC[C@H](C)[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[...   \n",
       "41                                                    N   \n",
       "47    CC[C@H](C)[C@@H](C(=O)N[C@@H](CC1=CC=C(C=C1)O)...   \n",
       "50                                                    I   \n",
       "51                                               [Ca+2]   \n",
       "53                                                    C   \n",
       "59                                                [Cl-]   \n",
       "126                                                [K+]   \n",
       "136                                              [Mg+2]   \n",
       "139                                               [Na+]   \n",
       "148                                               [OH-]   \n",
       "149                                     FS(F)(F)(F)(F)F   \n",
       "162   CC[C@H](C)[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[...   \n",
       "182   C[C@H]([C@@H](C(=O)N[C@@H](CC(C)C)C(=O)N[C@@H]...   \n",
       "189   CC[C@H](C)[C@@H](C(=O)N[C@@H]([C@@H](C)O)C(=O)...   \n",
       "197   CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@H](C(=O)N...   \n",
       "201   CC[C@H](C)[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[...   \n",
       "207                                    [H-].[H-].[Ba+2]   \n",
       "215                                    [H-].[H-].[Sr+2]   \n",
       "251                                                [As]   \n",
       "291   C[C@@H](C(=O)N[C@@H](CCC(=O)O)C(=O)N[C@@H](CC(...   \n",
       "294   C[C@H]([C@@H](C(=O)N[C@@H](CC1=CC=CC=C1)C(=O)N...   \n",
       "309   CC[C@H](C)[C@@H](C(=O)N[C@@H](CO)C(=O)N[C@@H](...   \n",
       "334   CC[C@H](C)[C@@H](C(=O)N[C@@H](C(C)C)C(=O)N[C@@...   \n",
       "336   CC[C@H](C)[C@@H](C(=O)N[C@@H](CC(C)C)C(=O)NCC(...   \n",
       "338   CC[C@H](C)[C@@H](C(=O)N[C@@H](CC(C)C)C(=O)N1CC...   \n",
       "424                                                [Se]   \n",
       "627   CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)N[C@@H](C(=O)...   \n",
       "723   C[C@H]([C@@H](C(=O)N[C@@H](CC1=CC=CC=C1)C(=O)N...   \n",
       "...                                                 ...   \n",
       "740                                                [Ra]   \n",
       "745                                         [Li+].[OH-]   \n",
       "977   CCCCCCCCCCCCCCCC(=O)N[C@@H](CCC(=O)NCCCC[C@@H]...   \n",
       "1058                                         [Na+].[I-]   \n",
       "1120  CC/C=C/CC(=O)N[C@@H](CC1=CC=C(C=C1)O)C(=O)N[C@...   \n",
       "1223  CC[C@H](C)[C@@H](C(=O)N[C@@H](C(C)C)C(=O)N[C@@...   \n",
       "1243                  [O-2].[O-2].[O-2].[249Cf].[249Cf]   \n",
       "1297                    [O-2].[O-2].[O-2].[As+3].[As+3]   \n",
       "1300                                                  F   \n",
       "1311                                             [Ga+3]   \n",
       "1333  CCCCCCCCCCCCCC(=O)NCCCC[C@@H](C(=O)O)NC(=O)[C@...   \n",
       "1352                                               [Fe]   \n",
       "1353                                               [La]   \n",
       "1354                                               [Sm]   \n",
       "1355                                               [Ag]   \n",
       "1356                                               [Tc]   \n",
       "1361                                               [Cr]   \n",
       "1363                                               [Cu]   \n",
       "1364                                               [Gd]   \n",
       "1365                                                [Y]   \n",
       "1366                                               [Zn]   \n",
       "1381  CC(C)CC1C(=O)NC(C(=O)NC(C(=O)NC(CSSCC(C(=O)NC(...   \n",
       "1382  CC(C)[C@@H](C(=O)NCC(=O)N[C@H](CCCCN)C(=O)N[C@...   \n",
       "1388  CC(C)CC(C(=O)NC(C(C)C)C(=O)O)NC(=O)CNC(=O)C(CC...   \n",
       "1390  CC[C@H](C)[C@H]1C(=O)N[C@H]2CSSC[C@@H](C(=O)N[...   \n",
       "1391  CC1=CN(C(=O)NC1=O)[C@H]2C[C@@H]([C@H](O2)COP(=...   \n",
       "1392  CCC(C)C(C(=O)NC(CCC(=O)N)C(=O)NC(CC(C)C)C(=O)N...   \n",
       "1393  CC1C(=O)NC(C(=O)NC2CSSCC3C(=O)NC(C(=O)NC(C(=O)...   \n",
       "1396  CC[C@H](C)[C@H]1C(=O)N[C@H](C(=O)NCC(=O)N[C@H]...   \n",
       "1397  CC[C@H](C)[C@@H](C(=O)N1CCC[C@H]1C(=O)N[C@@H](...   \n",
       "\n",
       "                                            cano_smiles  \n",
       "5     CC[C@H](C)[C@H](NC(=O)CN)C(=O)N[C@H](C(=O)N[C@...  \n",
       "29    CC[C@H](C)[C@H](NC(=O)[C@H](CC(C)C)NC(=O)[C@H]...  \n",
       "41                                                    N  \n",
       "47    CC[C@H](C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C...  \n",
       "50                                                    I  \n",
       "51                                               [Ca+2]  \n",
       "53                                                    C  \n",
       "59                                                [Cl-]  \n",
       "126                                                [K+]  \n",
       "136                                              [Mg+2]  \n",
       "139                                               [Na+]  \n",
       "148                                               [OH-]  \n",
       "149                                     FS(F)(F)(F)(F)F  \n",
       "162   CC[C@H](C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C...  \n",
       "182   CSCC[C@H](N)C(=O)N[C@@H](CCC(N)=O)C(=O)N[C@@H]...  \n",
       "189   CC[C@H](C)[C@H](NC(=O)[C@H](Cc1ccccc1)NC(=O)[C...  \n",
       "197   CC[C@H](C)[C@@H]1NC(=O)[C@H](CCCNC(=N)N)NC(=O)...  \n",
       "201   CC[C@H](C)[C@H](NC(=O)[C@@H](Cc1ccccc1)NC(=O)[...  \n",
       "207                                    [Ba+2].[H-].[H-]  \n",
       "215                                    [H-].[H-].[Sr+2]  \n",
       "251                                                [As]  \n",
       "291   CSCC[C@H](NC(=O)[C@H](CO)NC(=O)[C@H](Cc1ccc(O)...  \n",
       "294   CSCC[C@H](NC(=O)[C@H](CC(C)C)NC(=O)[C@H](Cc1c[...  \n",
       "309   CC[C@H](C)[C@H](NC(=O)[C@H](CC(=O)O)NC(=O)[C@H...  \n",
       "334   CC(C)C[C@H](NC(=O)[C@H](C)NC(=O)[C@H](CCC(=O)O...  \n",
       "336   CC[C@H](C)[C@H](NC(=O)[C@H](CS)NC(=O)[C@H](CCC...  \n",
       "338   CC[C@H](C)[C@H](NC(=O)[C@@H]1CCCN1C(=O)CNC(=O)...  \n",
       "424                                                [Se]  \n",
       "627   CC[C@H](C)[C@@H]1NC(=O)[C@H](CCCNC(=N)N)NC(=O)...  \n",
       "723   CC(=O)O.CC(C)C[C@H](NC(=O)CNC(=O)[C@H](CCC(N)=...  \n",
       "...                                                 ...  \n",
       "740                                                [Ra]  \n",
       "745                                         [Li+].[OH-]  \n",
       "977   CCCCCCCCCCCCCCCC(=O)N[C@@H](CCC(=O)NCCCC[C@H](...  \n",
       "1058                                         [I-].[Na+]  \n",
       "1120  CC/C=C/CC(=O)N[C@@H](Cc1ccc(O)cc1)C(=O)N[C@@H]...  \n",
       "1223  CC(C)C[C@H](NC(=O)[C@H](C)NC(=O)[C@H](CCC(=O)O...  \n",
       "1243                  [249Cf].[249Cf].[O-2].[O-2].[O-2]  \n",
       "1297                    [As+3].[As+3].[O-2].[O-2].[O-2]  \n",
       "1300                                                  F  \n",
       "1311                                             [Ga+3]  \n",
       "1333  CCCCCCCCCCCCCC(=O)NCCCC[C@H](NC(=O)[C@@H]1CCCN...  \n",
       "1352                                               [Fe]  \n",
       "1353                                               [La]  \n",
       "1354                                               [Sm]  \n",
       "1355                                               [Ag]  \n",
       "1356                                               [Tc]  \n",
       "1361                                               [Cr]  \n",
       "1363                                               [Cu]  \n",
       "1364                                               [Gd]  \n",
       "1365                                                [Y]  \n",
       "1366                                               [Zn]  \n",
       "1381  CC(C)CC(NC(=O)C(CCCCN)NC(=O)CNC(=O)C(CC(C)C)NC...  \n",
       "1382  CSCC[C@H](NC(=O)[C@H](CO)NC(=O)[C@H](Cc1ccc(O)...  \n",
       "1388  CC(C)CC(NC(=O)CNC(=O)C(CCC(N)=O)NC(=O)C(CC(C)C...  \n",
       "1390  CC[C@H](C)[C@H](NC(=O)CN)C(=O)N[C@H](C(=O)N[C@...  \n",
       "1391  COCCO[C@H]1[C@@H](O)[C@H](COP(=O)([O-])S[C@H]2...  \n",
       "1392  CCC(C)C(NC(=O)C(CCC(=O)O)NC(=O)C(CO)NC(=O)C(NC...  \n",
       "1393  CSCCC1NC(=O)C(CC(C)C)NC(=O)C(CCCNC(=N)N)NC(=O)...  \n",
       "1396  CC[C@H](C)[C@@H]1NC(=O)[C@@H]2CSSC[C@H](NC(=O)...  \n",
       "1397  CC[C@H](C)[C@H](NC(=O)[C@H](CCC(=O)O)NC(=O)[C@...  \n",
       "\n",
       "[61 rows x 29 columns]"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "smilesList = [smiles for smiles in canonical_smiles_list if len(Chem.MolFromSmiles(smiles).GetAtoms())<151]\n",
    "\n",
    "if os.path.isfile(feature_filename):\n",
    "    feature_dicts = pickle.load(open(feature_filename, \"rb\" ))\n",
    "else:\n",
    "    feature_dicts = save_smiles_dicts(smilesList,filename)\n",
    "# feature_dicts = get_smiles_dicts(smilesList)\n",
    "\n",
    "remained_df = smiles_tasks_df[smiles_tasks_df[\"cano_smiles\"].isin(feature_dicts['smiles_to_atom_mask'].keys())]\n",
    "uncovered_df = smiles_tasks_df.drop(remained_df.index)\n",
    "uncovered_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "weights = []\n",
    "for i,task in enumerate(tasks):    \n",
    "    negative_df = remained_df[remained_df[task] == 0][[\"smiles\",task]]\n",
    "    positive_df = remained_df[remained_df[task] == 1][[\"smiles\",task]]\n",
    "    weights.append([(positive_df.shape[0]+negative_df.shape[0])/negative_df.shape[0],\\\n",
    "                    (positive_df.shape[0]+negative_df.shape[0])/positive_df.shape[0]])\n",
    "\n",
    "test_df = remained_df.sample(frac=1/10, random_state=3) # test set\n",
    "training_data = remained_df.drop(test_df.index) # training data\n",
    "\n",
    "# training data is further divided into validation set and train set\n",
    "valid_df = training_data.sample(frac=1/9, random_state=3) # validation set\n",
    "train_df = training_data.drop(valid_df.index) # train set\n",
    "train_df = train_df.reset_index(drop=True)\n",
    "valid_df = valid_df.reset_index(drop=True)\n",
    "test_df = test_df.reset_index(drop=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1156058\n",
      "atom_fc.weight torch.Size([200, 39])\n",
      "atom_fc.bias torch.Size([200])\n",
      "neighbor_fc.weight torch.Size([200, 49])\n",
      "neighbor_fc.bias torch.Size([200])\n",
      "GRUCell.0.weight_ih torch.Size([600, 200])\n",
      "GRUCell.0.weight_hh torch.Size([600, 200])\n",
      "GRUCell.0.bias_ih torch.Size([600])\n",
      "GRUCell.0.bias_hh torch.Size([600])\n",
      "GRUCell.1.weight_ih torch.Size([600, 200])\n",
      "GRUCell.1.weight_hh torch.Size([600, 200])\n",
      "GRUCell.1.bias_ih torch.Size([600])\n",
      "GRUCell.1.bias_hh torch.Size([600])\n",
      "GRUCell.2.weight_ih torch.Size([600, 200])\n",
      "GRUCell.2.weight_hh torch.Size([600, 200])\n",
      "GRUCell.2.bias_ih torch.Size([600])\n",
      "GRUCell.2.bias_hh torch.Size([600])\n",
      "align.0.weight torch.Size([1, 400])\n",
      "align.0.bias torch.Size([1])\n",
      "align.1.weight torch.Size([1, 400])\n",
      "align.1.bias torch.Size([1])\n",
      "align.2.weight torch.Size([1, 400])\n",
      "align.2.bias torch.Size([1])\n",
      "attend.0.weight torch.Size([200, 200])\n",
      "attend.0.bias torch.Size([200])\n",
      "attend.1.weight torch.Size([200, 200])\n",
      "attend.1.bias torch.Size([200])\n",
      "attend.2.weight torch.Size([200, 200])\n",
      "attend.2.bias torch.Size([200])\n",
      "mol_GRUCell.weight_ih torch.Size([600, 200])\n",
      "mol_GRUCell.weight_hh torch.Size([600, 200])\n",
      "mol_GRUCell.bias_ih torch.Size([600])\n",
      "mol_GRUCell.bias_hh torch.Size([600])\n",
      "mol_align.weight torch.Size([1, 400])\n",
      "mol_align.bias torch.Size([1])\n",
      "mol_attend.weight torch.Size([200, 200])\n",
      "mol_attend.bias torch.Size([200])\n",
      "output.weight torch.Size([54, 200])\n",
      "output.bias torch.Size([54])\n"
     ]
    }
   ],
   "source": [
    "x_atom, x_bonds, x_atom_index, x_bond_index, x_mask, smiles_to_rdkit_list = get_smiles_array([canonical_smiles_list[0]],feature_dicts)\n",
    "num_atom_features = x_atom.shape[-1]\n",
    "num_bond_features = x_bonds.shape[-1]\n",
    "\n",
    "loss_function = [nn.CrossEntropyLoss(torch.Tensor(weight),reduction='mean') for weight in weights]\n",
    "model = Fingerprint(radius, T, num_atom_features,num_bond_features,\n",
    "            fingerprint_dim, output_units_num, p_dropout)\n",
    "model.cuda()\n",
    "# tensorboard = SummaryWriter(log_dir=\"runs/\"+start_time+\"_\"+prefix_filename+\"_\"+str(fingerprint_dim)+\"_\"+str(p_dropout))\n",
    "\n",
    "# optimizer = optim.Adam(model.parameters(), learning_rate, weight_decay=weight_decay)\n",
    "optimizer = optim.Adam(model.parameters(), 10**-learning_rate, weight_decay=10**-weight_decay)\n",
    "model_parameters = filter(lambda p: p.requires_grad, model.parameters())\n",
    "params = sum([np.prod(p.size()) for p in model_parameters])\n",
    "print(params)\n",
    "for name, param in model.named_parameters():\n",
    "    if param.requires_grad:\n",
    "        print(name, param.data.shape)\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train(model, dataset, optimizer, loss_function):\n",
    "    model.train()\n",
    "    np.random.seed(epoch)\n",
    "    valList = np.arange(0,dataset.shape[0])\n",
    "    #shuffle them\n",
    "    np.random.shuffle(valList)\n",
    "    batch_list = []\n",
    "    for i in range(0, dataset.shape[0], batch_size):\n",
    "        batch = valList[i:i+batch_size]\n",
    "        batch_list.append(batch)   \n",
    "    for counter, train_batch in enumerate(batch_list):\n",
    "        batch_df = dataset.loc[train_batch,:]\n",
    "        smiles_list = batch_df.cano_smiles.values\n",
    "        \n",
    "        x_atom, x_bonds, x_atom_index, x_bond_index, x_mask, smiles_to_rdkit_list = get_smiles_array(smiles_list,feature_dicts)\n",
    "        atoms_prediction, mol_prediction = model(torch.Tensor(x_atom),torch.Tensor(x_bonds),torch.cuda.LongTensor(x_atom_index),torch.cuda.LongTensor(x_bond_index),torch.Tensor(x_mask))\n",
    "#         print(torch.Tensor(x_atom).size(),torch.Tensor(x_bonds).size(),torch.cuda.LongTensor(x_atom_index).size(),torch.cuda.LongTensor(x_bond_index).size(),torch.Tensor(x_mask).size())\n",
    "        \n",
    "        optimizer.zero_grad()\n",
    "        loss = 0.0\n",
    "        for i,task in enumerate(tasks):\n",
    "            y_pred = mol_prediction[:, i * per_task_output_units_num:(i + 1) *\n",
    "                                    per_task_output_units_num]\n",
    "            y_val = batch_df[task].values\n",
    "\n",
    "            validInds = np.where((y_val==0) | (y_val==1))[0]\n",
    "#             validInds = np.where(y_val != -1)[0]\n",
    "            if len(validInds) == 0:\n",
    "                continue\n",
    "            y_val_adjust = np.array([y_val[v] for v in validInds]).astype(float)\n",
    "            validInds = torch.cuda.LongTensor(validInds).squeeze()\n",
    "            y_pred_adjust = torch.index_select(y_pred, 0, validInds)\n",
    "\n",
    "            loss += loss_function[i](\n",
    "                y_pred_adjust,\n",
    "                torch.cuda.LongTensor(y_val_adjust))\n",
    "        # Step 5. Do the backward pass and update the gradient\n",
    "        loss.backward()\n",
    "        optimizer.step()\n",
    "def eval(model, dataset):\n",
    "    model.eval()\n",
    "    y_val_list = {}\n",
    "    y_pred_list = {}\n",
    "    losses_list = []\n",
    "    valList = np.arange(0,dataset.shape[0])\n",
    "    batch_list = []\n",
    "    for i in range(0, dataset.shape[0], batch_size):\n",
    "        batch = valList[i:i+batch_size]\n",
    "        batch_list.append(batch)   \n",
    "    for counter, eval_batch in enumerate(batch_list):\n",
    "        batch_df = dataset.loc[eval_batch,:]\n",
    "        smiles_list = batch_df.cano_smiles.values\n",
    "        \n",
    "        x_atom, x_bonds, x_atom_index, x_bond_index, x_mask, smiles_to_rdkit_list = get_smiles_array(smiles_list,feature_dicts)\n",
    "        atoms_prediction, mol_prediction = model(torch.Tensor(x_atom),torch.Tensor(x_bonds),torch.cuda.LongTensor(x_atom_index),torch.cuda.LongTensor(x_bond_index),torch.Tensor(x_mask))\n",
    "        atom_pred = atoms_prediction.data[:,:,1].unsqueeze(2).cpu().numpy()\n",
    "        for i,task in enumerate(tasks):\n",
    "            y_pred = mol_prediction[:, i * per_task_output_units_num:(i + 1) *\n",
    "                                    per_task_output_units_num]\n",
    "            y_val = batch_df[task].values\n",
    "\n",
    "            validInds = np.where((y_val==0) | (y_val==1))[0]\n",
    "#             validInds = np.where((y_val=='0') | (y_val=='1'))[0]\n",
    "#             print(validInds)\n",
    "            if len(validInds) == 0:\n",
    "                continue\n",
    "            y_val_adjust = np.array([y_val[v] for v in validInds]).astype(float)\n",
    "            validInds = torch.cuda.LongTensor(validInds).squeeze()\n",
    "            y_pred_adjust = torch.index_select(y_pred, 0, validInds)\n",
    "#             print(validInds)\n",
    "            loss = loss_function[i](\n",
    "                y_pred_adjust,\n",
    "                torch.cuda.LongTensor(y_val_adjust))\n",
    "#             print(y_pred_adjust)\n",
    "            y_pred_adjust = F.softmax(y_pred_adjust,dim=-1).data.cpu().numpy()[:,1]\n",
    "            losses_list.append(loss.cpu().detach().numpy())\n",
    "            try:\n",
    "                y_val_list[i].extend(y_val_adjust)\n",
    "                y_pred_list[i].extend(y_pred_adjust)\n",
    "            except:\n",
    "                y_val_list[i] = []\n",
    "                y_pred_list[i] = []\n",
    "                y_val_list[i].extend(y_val_adjust)\n",
    "                y_pred_list[i].extend(y_pred_adjust)\n",
    "                \n",
    "    eval_roc = [roc_auc_score(y_val_list[i], y_pred_list[i]) for i in range(len(tasks))]\n",
    "#     eval_prc = [auc(precision_recall_curve(y_val_list[i], y_pred_list[i])[1],precision_recall_curve(y_val_list[i], y_pred_list[i])[0]) for i in range(len(tasks))]\n",
    "#     eval_precision = [precision_score(y_val_list[i],\n",
    "#                                      (np.array(y_pred_list[i]) > 0.5).astype(int)) for i in range(len(tasks))]\n",
    "#     eval_recall = [recall_score(y_val_list[i],\n",
    "#                                (np.array(y_pred_list[i]) > 0.5).astype(int)) for i in range(len(tasks))]\n",
    "    eval_loss = np.array(losses_list).mean()\n",
    "    \n",
    "    return eval_roc, eval_loss #eval_prc, eval_precision, eval_recall, \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EPOCH:\t0\n",
      "train_roc_mean:0.4745025840874473\n",
      "valid_roc_mean:0.49017626895638255\n",
      "\n",
      "EPOCH:\t1\n",
      "train_roc_mean:0.5548504711703001\n",
      "valid_roc_mean:0.528534445968552\n",
      "\n",
      "EPOCH:\t2\n",
      "train_roc_mean:0.5649917830387543\n",
      "valid_roc_mean:0.5229617837093704\n",
      "\n",
      "EPOCH:\t3\n",
      "train_roc_mean:0.5718834888386992\n",
      "valid_roc_mean:0.5277455751285598\n",
      "\n",
      "EPOCH:\t4\n",
      "train_roc_mean:0.5824133676170438\n",
      "valid_roc_mean:0.535151078247592\n",
      "\n",
      "EPOCH:\t5\n",
      "train_roc_mean:0.5845688851188364\n",
      "valid_roc_mean:0.5423824630612689\n",
      "\n",
      "EPOCH:\t6\n",
      "train_roc_mean:0.5837590104789574\n",
      "valid_roc_mean:0.5371161181439686\n",
      "\n",
      "EPOCH:\t7\n",
      "train_roc_mean:0.5892767800159936\n",
      "valid_roc_mean:0.5415209263787256\n",
      "\n",
      "EPOCH:\t8\n",
      "train_roc_mean:0.5926493411653994\n",
      "valid_roc_mean:0.5422553856756425\n",
      "\n",
      "EPOCH:\t9\n",
      "train_roc_mean:0.5946740922895939\n",
      "valid_roc_mean:0.5545765182739005\n",
      "\n",
      "EPOCH:\t10\n",
      "train_roc_mean:0.5984943745384655\n",
      "valid_roc_mean:0.5549391837269181\n",
      "\n",
      "EPOCH:\t11\n",
      "train_roc_mean:0.6010380051390631\n",
      "valid_roc_mean:0.5570609489627487\n",
      "\n",
      "EPOCH:\t12\n",
      "train_roc_mean:0.6042521506388091\n",
      "valid_roc_mean:0.5372209302167309\n",
      "\n",
      "EPOCH:\t13\n",
      "train_roc_mean:0.6083760623911004\n",
      "valid_roc_mean:0.5443943406564106\n",
      "\n",
      "EPOCH:\t14\n",
      "train_roc_mean:0.6126465711820075\n",
      "valid_roc_mean:0.553809684938379\n",
      "\n",
      "EPOCH:\t15\n",
      "train_roc_mean:0.6177275755303303\n",
      "valid_roc_mean:0.5556220080804531\n",
      "\n",
      "EPOCH:\t16\n",
      "train_roc_mean:0.6225250750519313\n",
      "valid_roc_mean:0.548368478712315\n",
      "\n",
      "EPOCH:\t17\n",
      "train_roc_mean:0.6225347375478391\n",
      "valid_roc_mean:0.5525858025371114\n",
      "\n",
      "EPOCH:\t18\n",
      "train_roc_mean:0.6305608625134183\n",
      "valid_roc_mean:0.5542906487122935\n",
      "\n",
      "EPOCH:\t19\n",
      "train_roc_mean:0.6364730170841484\n",
      "valid_roc_mean:0.5432425494675794\n",
      "\n",
      "EPOCH:\t20\n",
      "train_roc_mean:0.6419739814151639\n",
      "valid_roc_mean:0.5447097177623013\n",
      "\n",
      "EPOCH:\t21\n",
      "train_roc_mean:0.6483565899307101\n",
      "valid_roc_mean:0.5447916987611734\n",
      "\n",
      "EPOCH:\t22\n",
      "train_roc_mean:0.6549258383076647\n",
      "valid_roc_mean:0.5432221829023682\n",
      "\n",
      "EPOCH:\t23\n",
      "train_roc_mean:0.6594190709669034\n",
      "valid_roc_mean:0.5381854010597923\n",
      "\n",
      "EPOCH:\t24\n",
      "train_roc_mean:0.6642534192238094\n",
      "valid_roc_mean:0.5409561011365711\n",
      "\n",
      "EPOCH:\t25\n",
      "train_roc_mean:0.6639020450626579\n",
      "valid_roc_mean:0.5459545011290419\n",
      "\n",
      "EPOCH:\t26\n",
      "train_roc_mean:0.6654176182121198\n",
      "valid_roc_mean:0.5491423944359721\n",
      "\n",
      "EPOCH:\t27\n",
      "train_roc_mean:0.6734900093323728\n",
      "valid_roc_mean:0.5427424544495488\n",
      "\n",
      "EPOCH:\t28\n",
      "train_roc_mean:0.6779779758949653\n",
      "valid_roc_mean:0.5518636428404091\n",
      "\n",
      "EPOCH:\t29\n",
      "train_roc_mean:0.6760112231636368\n",
      "valid_roc_mean:0.5531542215036146\n",
      "\n",
      "EPOCH:\t30\n",
      "train_roc_mean:0.6844659837348238\n",
      "valid_roc_mean:0.5553111788124618\n",
      "\n",
      "EPOCH:\t31\n",
      "train_roc_mean:0.6851315396007143\n",
      "valid_roc_mean:0.5544016528920244\n",
      "\n",
      "EPOCH:\t32\n",
      "train_roc_mean:0.6904007659126628\n",
      "valid_roc_mean:0.5537853772476881\n",
      "\n",
      "EPOCH:\t33\n",
      "train_roc_mean:0.6934677634977953\n",
      "valid_roc_mean:0.5551200408416971\n",
      "\n",
      "EPOCH:\t34\n",
      "train_roc_mean:0.6961132709716603\n",
      "valid_roc_mean:0.5572283690727098\n",
      "\n",
      "EPOCH:\t35\n",
      "train_roc_mean:0.6992553505237644\n",
      "valid_roc_mean:0.5568953596447268\n",
      "\n",
      "EPOCH:\t36\n",
      "train_roc_mean:0.69936508081539\n",
      "valid_roc_mean:0.5464637520007989\n",
      "\n",
      "EPOCH:\t37\n",
      "train_roc_mean:0.7008328895784902\n",
      "valid_roc_mean:0.5710126074218023\n",
      "\n",
      "EPOCH:\t38\n",
      "train_roc_mean:0.7055048961250955\n",
      "valid_roc_mean:0.5633244303017639\n",
      "\n",
      "EPOCH:\t39\n",
      "train_roc_mean:0.710058221374326\n",
      "valid_roc_mean:0.5605920957568249\n",
      "\n",
      "EPOCH:\t40\n",
      "train_roc_mean:0.7140468427795925\n",
      "valid_roc_mean:0.5569805460143278\n",
      "\n",
      "EPOCH:\t41\n",
      "train_roc_mean:0.7156543454396297\n",
      "valid_roc_mean:0.557145560340534\n",
      "\n",
      "EPOCH:\t42\n",
      "train_roc_mean:0.7155235540425969\n",
      "valid_roc_mean:0.5639916903958502\n",
      "\n",
      "EPOCH:\t43\n",
      "train_roc_mean:0.7182071610345511\n",
      "valid_roc_mean:0.5574281581588963\n",
      "\n",
      "EPOCH:\t44\n",
      "train_roc_mean:0.7243573079730645\n",
      "valid_roc_mean:0.5563187231954384\n",
      "\n",
      "EPOCH:\t45\n",
      "train_roc_mean:0.7220066929948267\n",
      "valid_roc_mean:0.5617732938808749\n",
      "\n",
      "EPOCH:\t46\n",
      "train_roc_mean:0.7269337997010908\n",
      "valid_roc_mean:0.5724653664334868\n",
      "\n",
      "EPOCH:\t47\n",
      "train_roc_mean:0.731426984246104\n",
      "valid_roc_mean:0.5599795113395394\n",
      "\n",
      "EPOCH:\t48\n",
      "train_roc_mean:0.7331084046131351\n",
      "valid_roc_mean:0.5558179071036885\n",
      "\n",
      "EPOCH:\t49\n",
      "train_roc_mean:0.73611063712176\n",
      "valid_roc_mean:0.5657057150628405\n",
      "\n",
      "EPOCH:\t50\n",
      "train_roc_mean:0.7382631014924429\n",
      "valid_roc_mean:0.5752600691309534\n",
      "\n",
      "EPOCH:\t51\n",
      "train_roc_mean:0.7410241926667\n",
      "valid_roc_mean:0.5659700637508677\n",
      "\n",
      "EPOCH:\t52\n",
      "train_roc_mean:0.7409890589168339\n",
      "valid_roc_mean:0.5687616216542852\n",
      "\n",
      "EPOCH:\t53\n",
      "train_roc_mean:0.7424576413879659\n",
      "valid_roc_mean:0.5663453867633658\n",
      "\n",
      "EPOCH:\t54\n",
      "train_roc_mean:0.7471264648303256\n",
      "valid_roc_mean:0.5659399521508134\n",
      "\n",
      "EPOCH:\t55\n",
      "train_roc_mean:0.7511655970812372\n",
      "valid_roc_mean:0.578933127774083\n",
      "\n",
      "EPOCH:\t56\n",
      "train_roc_mean:0.753513850354316\n",
      "valid_roc_mean:0.5769225043785882\n",
      "\n",
      "EPOCH:\t57\n",
      "train_roc_mean:0.7564249172834677\n",
      "valid_roc_mean:0.5758735040302296\n",
      "\n",
      "EPOCH:\t58\n",
      "train_roc_mean:0.755903618197074\n",
      "valid_roc_mean:0.5829829441713261\n",
      "\n",
      "EPOCH:\t59\n",
      "train_roc_mean:0.7547377400660668\n",
      "valid_roc_mean:0.572690314796294\n",
      "\n",
      "EPOCH:\t60\n",
      "train_roc_mean:0.7603659168835312\n",
      "valid_roc_mean:0.5783287599289836\n",
      "\n",
      "EPOCH:\t61\n",
      "train_roc_mean:0.7629946542122569\n",
      "valid_roc_mean:0.588543918593444\n",
      "\n",
      "EPOCH:\t62\n",
      "train_roc_mean:0.76556087441961\n",
      "valid_roc_mean:0.5801355220921637\n",
      "\n",
      "EPOCH:\t63\n",
      "train_roc_mean:0.7672546774693032\n",
      "valid_roc_mean:0.5811369504141469\n",
      "\n",
      "EPOCH:\t64\n",
      "train_roc_mean:0.7671914016729704\n",
      "valid_roc_mean:0.5941955617937149\n",
      "\n",
      "EPOCH:\t65\n",
      "train_roc_mean:0.7715964934738239\n",
      "valid_roc_mean:0.5757190601359996\n",
      "\n",
      "EPOCH:\t66\n",
      "train_roc_mean:0.7753416310907022\n",
      "valid_roc_mean:0.5916836799522395\n",
      "\n",
      "EPOCH:\t67\n",
      "train_roc_mean:0.7762843183468012\n",
      "valid_roc_mean:0.5856578044674033\n",
      "\n",
      "EPOCH:\t68\n",
      "train_roc_mean:0.7765011314636451\n",
      "valid_roc_mean:0.5925860650081384\n",
      "\n",
      "EPOCH:\t69\n",
      "train_roc_mean:0.7824419091956768\n",
      "valid_roc_mean:0.5872111251760237\n",
      "\n",
      "EPOCH:\t70\n",
      "train_roc_mean:0.7840474049437075\n",
      "valid_roc_mean:0.5824187410984552\n",
      "\n",
      "EPOCH:\t71\n",
      "train_roc_mean:0.7837645240383315\n",
      "valid_roc_mean:0.5866069641472953\n",
      "\n",
      "EPOCH:\t72\n",
      "train_roc_mean:0.7884409440924307\n",
      "valid_roc_mean:0.5923002519678208\n",
      "\n",
      "EPOCH:\t73\n",
      "train_roc_mean:0.7894063656818678\n",
      "valid_roc_mean:0.5870669249341739\n",
      "\n",
      "EPOCH:\t74\n",
      "train_roc_mean:0.7845833686354332\n",
      "valid_roc_mean:0.5974688462790555\n",
      "\n",
      "EPOCH:\t75\n",
      "train_roc_mean:0.7935441093133482\n",
      "valid_roc_mean:0.5896477025541158\n",
      "\n",
      "EPOCH:\t76\n",
      "train_roc_mean:0.7959104640099426\n",
      "valid_roc_mean:0.5983300691597053\n",
      "\n",
      "EPOCH:\t77\n",
      "train_roc_mean:0.7942488387828918\n",
      "valid_roc_mean:0.5893951805934258\n",
      "\n",
      "EPOCH:\t78\n",
      "train_roc_mean:0.7932888885963156\n",
      "valid_roc_mean:0.5835295322179149\n",
      "\n",
      "EPOCH:\t79\n",
      "train_roc_mean:0.7981592986161374\n",
      "valid_roc_mean:0.5996008629466417\n",
      "\n",
      "EPOCH:\t80\n",
      "train_roc_mean:0.8015255255965731\n",
      "valid_roc_mean:0.5934570671923702\n",
      "\n",
      "EPOCH:\t81\n",
      "train_roc_mean:0.8047649767511776\n",
      "valid_roc_mean:0.5957252175475444\n",
      "\n",
      "EPOCH:\t82\n",
      "train_roc_mean:0.8108527254339165\n",
      "valid_roc_mean:0.5973903014548614\n",
      "\n",
      "EPOCH:\t83\n",
      "train_roc_mean:0.8131509162228944\n",
      "valid_roc_mean:0.5867052820125952\n",
      "\n",
      "EPOCH:\t84\n",
      "train_roc_mean:0.8167391720559081\n",
      "valid_roc_mean:0.5969286306122995\n",
      "\n",
      "EPOCH:\t85\n",
      "train_roc_mean:0.8131520343738186\n",
      "valid_roc_mean:0.6011879057235942\n",
      "\n",
      "EPOCH:\t86\n",
      "train_roc_mean:0.816022324095765\n",
      "valid_roc_mean:0.5999323300258159\n",
      "\n",
      "EPOCH:\t87\n",
      "train_roc_mean:0.8158244394557479\n",
      "valid_roc_mean:0.60219284101023\n",
      "\n",
      "EPOCH:\t88\n",
      "train_roc_mean:0.8149378215259241\n",
      "valid_roc_mean:0.6016573607703436\n",
      "\n",
      "EPOCH:\t89\n",
      "train_roc_mean:0.8165878879520008\n",
      "valid_roc_mean:0.5971334198761846\n",
      "\n",
      "EPOCH:\t90\n",
      "train_roc_mean:0.822014717352766\n",
      "valid_roc_mean:0.6017874419849236\n",
      "\n",
      "EPOCH:\t91\n",
      "train_roc_mean:0.8262386575374171\n",
      "valid_roc_mean:0.5990145765387078\n",
      "\n",
      "EPOCH:\t92\n",
      "train_roc_mean:0.8279990884377048\n",
      "valid_roc_mean:0.5990104210201337\n",
      "\n",
      "EPOCH:\t93\n",
      "train_roc_mean:0.8266358302683413\n",
      "valid_roc_mean:0.5988182960582015\n",
      "\n",
      "EPOCH:\t94\n",
      "train_roc_mean:0.8339253241432504\n",
      "valid_roc_mean:0.6002843974733246\n",
      "\n",
      "EPOCH:\t95\n",
      "train_roc_mean:0.8338299645747644\n",
      "valid_roc_mean:0.6010368197907958\n",
      "\n",
      "EPOCH:\t96\n",
      "train_roc_mean:0.8317828080005903\n",
      "valid_roc_mean:0.6043384724213291\n",
      "\n",
      "EPOCH:\t97\n",
      "train_roc_mean:0.8358181112044373\n",
      "valid_roc_mean:0.5989409107731173\n",
      "\n",
      "EPOCH:\t98\n",
      "train_roc_mean:0.8428243110861229\n",
      "valid_roc_mean:0.596442060547809\n",
      "\n",
      "EPOCH:\t99\n",
      "train_roc_mean:0.8453650028509742\n",
      "valid_roc_mean:0.5937166528419234\n",
      "\n",
      "EPOCH:\t100\n",
      "train_roc_mean:0.8411079744540179\n",
      "valid_roc_mean:0.5982072470565382\n",
      "\n",
      "EPOCH:\t101\n",
      "train_roc_mean:0.8463030693434715\n",
      "valid_roc_mean:0.6075826613624199\n",
      "\n",
      "EPOCH:\t102\n",
      "train_roc_mean:0.8512770939483952\n",
      "valid_roc_mean:0.6086667744309447\n",
      "\n",
      "EPOCH:\t103\n",
      "train_roc_mean:0.8539653346501339\n",
      "valid_roc_mean:0.596879820914438\n",
      "\n",
      "EPOCH:\t104\n",
      "train_roc_mean:0.8549296437558043\n",
      "valid_roc_mean:0.6058492251526814\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "EPOCH:\t105\n",
      "train_roc_mean:0.8591249395762136\n",
      "valid_roc_mean:0.6030990392848413\n",
      "\n",
      "EPOCH:\t106\n",
      "train_roc_mean:0.8591906931048839\n",
      "valid_roc_mean:0.6189214521775712\n",
      "\n",
      "EPOCH:\t107\n",
      "train_roc_mean:0.8545885014881497\n",
      "valid_roc_mean:0.6153929403604805\n",
      "\n",
      "EPOCH:\t108\n",
      "train_roc_mean:0.8647012064503735\n",
      "valid_roc_mean:0.5981357606328908\n",
      "\n",
      "EPOCH:\t109\n",
      "train_roc_mean:0.8687554736807924\n",
      "valid_roc_mean:0.6127188135562127\n",
      "\n",
      "EPOCH:\t110\n",
      "train_roc_mean:0.8686585671791933\n",
      "valid_roc_mean:0.5936420976132978\n",
      "\n",
      "EPOCH:\t111\n",
      "train_roc_mean:0.8700043214001637\n",
      "valid_roc_mean:0.6110330551903742\n",
      "\n",
      "EPOCH:\t112\n",
      "train_roc_mean:0.8666604464157637\n",
      "valid_roc_mean:0.6130940581789226\n",
      "\n",
      "EPOCH:\t113\n",
      "train_roc_mean:0.8720517706741424\n",
      "valid_roc_mean:0.6236377103920006\n",
      "\n",
      "EPOCH:\t114\n",
      "train_roc_mean:0.8697058476327478\n",
      "valid_roc_mean:0.594451155514882\n",
      "\n",
      "EPOCH:\t115\n",
      "train_roc_mean:0.8642262139925155\n",
      "valid_roc_mean:0.6029946033411882\n",
      "\n",
      "EPOCH:\t116\n",
      "train_roc_mean:0.8758351723271092\n",
      "valid_roc_mean:0.621082024814408\n",
      "\n",
      "EPOCH:\t117\n",
      "train_roc_mean:0.8802723698338192\n",
      "valid_roc_mean:0.609090015622737\n",
      "\n",
      "EPOCH:\t118\n",
      "train_roc_mean:0.8827940420955446\n",
      "valid_roc_mean:0.6158509353260753\n",
      "\n",
      "EPOCH:\t119\n",
      "train_roc_mean:0.8868605398006771\n",
      "valid_roc_mean:0.6085747198276172\n",
      "\n",
      "EPOCH:\t120\n",
      "train_roc_mean:0.8876958506584176\n",
      "valid_roc_mean:0.6110194352985687\n",
      "\n",
      "EPOCH:\t121\n",
      "train_roc_mean:0.8847708872075866\n",
      "valid_roc_mean:0.6013628331493396\n",
      "\n",
      "EPOCH:\t122\n",
      "train_roc_mean:0.8905091737006915\n",
      "valid_roc_mean:0.6132115602270658\n",
      "\n",
      "EPOCH:\t123\n",
      "train_roc_mean:0.8920211592528554\n",
      "valid_roc_mean:0.6172424142918119\n",
      "\n",
      "EPOCH:\t124\n",
      "train_roc_mean:0.8893902969040676\n",
      "valid_roc_mean:0.6169504063132509\n",
      "\n",
      "EPOCH:\t125\n",
      "train_roc_mean:0.8921560647110603\n",
      "valid_roc_mean:0.60465917175855\n",
      "\n",
      "EPOCH:\t126\n",
      "train_roc_mean:0.8915257046732129\n",
      "valid_roc_mean:0.6114113836039281\n",
      "\n",
      "EPOCH:\t127\n",
      "train_roc_mean:0.8972297430464773\n",
      "valid_roc_mean:0.6091757554292487\n",
      "\n",
      "EPOCH:\t128\n",
      "train_roc_mean:0.8983871558000986\n",
      "valid_roc_mean:0.6094027280669727\n",
      "\n",
      "EPOCH:\t129\n",
      "train_roc_mean:0.8996253300052038\n",
      "valid_roc_mean:0.6160731850465614\n",
      "\n",
      "EPOCH:\t130\n",
      "train_roc_mean:0.9012759012929972\n",
      "valid_roc_mean:0.6063566652375066\n",
      "\n",
      "EPOCH:\t131\n",
      "train_roc_mean:0.9005412559869947\n",
      "valid_roc_mean:0.6121551168762441\n",
      "\n",
      "EPOCH:\t132\n",
      "train_roc_mean:0.9009858399593171\n",
      "valid_roc_mean:0.6134274357400571\n",
      "\n"
     ]
    }
   ],
   "source": [
    "best_param ={}\n",
    "best_param[\"roc_epoch\"] = 0\n",
    "best_param[\"loss_epoch\"] = 0\n",
    "best_param[\"valid_roc\"] = 0\n",
    "best_param[\"valid_loss\"] = 9e8\n",
    "\n",
    "for epoch in range(epochs):    \n",
    "    train_roc, train_loss = eval(model, train_df)\n",
    "    valid_roc, valid_loss = eval(model, valid_df)\n",
    "    train_roc_mean = np.array(train_roc).mean()\n",
    "    valid_roc_mean = np.array(valid_roc).mean()\n",
    "    \n",
    "#     tensorboard.add_scalars('ROC',{'train_roc':train_roc_mean,'valid_roc':valid_roc_mean},epoch)\n",
    "#     tensorboard.add_scalars('Losses',{'train_losses':train_loss,'valid_losses':valid_loss},epoch)\n",
    "\n",
    "    if valid_roc_mean > best_param[\"valid_roc\"]:\n",
    "        best_param[\"roc_epoch\"] = epoch\n",
    "        best_param[\"valid_roc\"] = valid_roc_mean\n",
    "        if valid_roc_mean > 0.62:\n",
    "             torch.save(model, 'saved_models/model_'+prefix_filename+'_'+start_time+'_'+str(epoch)+'.pt')             \n",
    "    if valid_loss < best_param[\"valid_loss\"]:\n",
    "        best_param[\"loss_epoch\"] = epoch\n",
    "        best_param[\"valid_loss\"] = valid_loss\n",
    "\n",
    "    print(\"EPOCH:\\t\"+str(epoch)+'\\n'\\\n",
    "#         +\"train_roc\"+\":\"+str(train_roc)+'\\n'\\\n",
    "#         +\"valid_roc\"+\":\"+str(valid_roc)+'\\n'\\\n",
    "        +\"train_roc_mean\"+\":\"+str(train_roc_mean)+'\\n'\\\n",
    "        +\"valid_roc_mean\"+\":\"+str(valid_roc_mean)+'\\n'\\\n",
    "        )\n",
    "    if (epoch - best_param[\"roc_epoch\"] >18) and (epoch - best_param[\"loss_epoch\"] >28):        \n",
    "        break\n",
    "        \n",
    "    train(model, train_df, optimizer, loss_function)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best epoch:113\n",
      "test_roc:[0.6567996567996568, 0.6291866028708133, 0.44029850746268656, 0.5961290322580646, 0.6551282051282051, 0.5381443298969072, 0.57, 0.6890756302521008, 0.5685714285714285, 0.6615515771526003, 0.7433993399339934, 0.78287841191067, 0.6309771309771309, 0.7702943800178411, 0.5951327433628318, 0.7557971014492755, 0.652605459057072, 0.6689285714285714, 0.6776677667766777, 0.5575675675675675, 0.6004709576138149, 0.6674913409203365, 0.6940836940836941, 0.5696581196581197, 0.6510263929618768, 0.762, 0.5070643642072215]\n",
      "test_roc_mean: 0.640441789345154\n"
     ]
    }
   ],
   "source": [
    "# evaluate model\n",
    "best_model = torch.load('saved_models/model_'+prefix_filename+'_'+start_time+'_'+str(best_param[\"roc_epoch\"])+'.pt')     \n",
    "\n",
    "best_model_dict = best_model.state_dict()\n",
    "best_model_wts = copy.deepcopy(best_model_dict)\n",
    "\n",
    "model.load_state_dict(best_model_wts)\n",
    "(best_model.align[0].weight == model.align[0].weight).all()\n",
    "test_roc, test_losses = eval(model, test_df)\n",
    "\n",
    "print(\"best epoch:\"+str(best_param[\"roc_epoch\"])\n",
    "      +\"\\n\"+\"test_roc:\"+str(test_roc)\n",
    "      +\"\\n\"+\"test_roc_mean:\",str(np.array(test_roc).mean())\n",
    "     )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
